#R Code for 
#Linking Policy Design and Policy Diffusion to Advance Both Theories: 
#Evidence from the Elements, Attributes, and Adoptions of 
#Uniform Law Commission Model Legislation
#Joshua M. Jansa and Daniel J. Mallinson
#Policy Studies Journal
#Last Updated 12/19/2024

rm(list = ls(all = TRUE))

### installing packages

install.packages("quanteda")
install.packages("readtext")
install.packages("quanteda.textstats")
install.packages("tm")
install.packages("readxl")
install.packages("haven")
install.packages("foreign")
install.packages("dplyr")
install.packages("forcats")
install.packages("ggplot2")
install.packages("survival")
install.packages("mirt")
install.packages("sjPlot")
install.packages("sjmisc")
install.packages("sjlabelled")
install.packages("tidyverse")
install.packages("rstatix")
install.packages("ggpubr")
install.packages("gplots")
install.packages("ggeffects")

library(quanteda)
library(readtext)
library(quanteda.textstats)
library(tm)
library(readxl)
library(haven)
library(foreign)
library(dplyr)
library(forcats)
library(ggplot2)
library(survival)
library(mirt)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(tidyverse)
library(rstatix)
library(ggpubr)
library(gplots)
library(ggeffects)

setwd("/Users/jjansa/Desktop/PSJ Complexity")

### uploading text data
ULCtexts <- readtext("ULC Texts/*.txt")
ULCpdfs <- readtext("ULC PDFs/*.pdf")

### transforming raw text data from txt and pdf into a corpus
ULCcorp1 <- corpus(ULCtexts)
ULCcorp2 <- corpus(ULCpdfs)
ULCcorpus <- corpus(ULCcorp1 + ULCcorp2)

### cleaning corpus into tokens
### preserves ordering and sentence structure
### but clears other idiosyncrasies.
ULCtokens<-tokens(ULCcorpus, 
                  remove_numbers = TRUE,
                  remove_symbols = TRUE,
                  remove_url = TRUE)

#then move back to corpus format
ULCcorpus_clean <- vapply(ULCtokens, paste, 
                          collapse = " ", character(1)) %>% corpus()

### using function textstat_summary for summary text statistics 
### on each bill
### summary measures can be applied to either corpus or dfm
ULCsummary <- textstat_summary(ULCcorpus_clean)

#optional: view and save summary stats
View(ULCsummary)
write.csv(ULCsummary, file = "ULCsummary.csv")

### using function textstat_readability to calculate readability measure
###  readability measures require text to be in corpus form
ULCreadability <- textstat_readability(ULCcorpus_clean, 
                                       measure = c("Flesch","Dale.Chall.old"))
#then calculate reading difficulty using reading ease scores
ULCreadability$frd<-abs(100-((100/(max(ULCreadability$Flesch)-min(ULCreadability$Flesch)))*
                              (ULCreadability$Flesch-max(ULCreadability$Flesch))+100))

#optional: view and save readability stats
View(ULCreadability)
write.csv(ULCreadability, file = "ULCreadability.csv")

#read in policy adoption data
#includes years of adoption, number of adoptions, and policy categories
ULCpolicy <- read_excel("ULC_adoption.xlsx")

#optional: view the data
View(ULCpolicy)

#next, read in policy cascade data
ulcnew <- read_excel("ULC_weibull.xlsx")
ulcnew <- ulcnew[which(ulcnew$policy!="simultaneous death act"),]
ulcnew$duration <- ulcnew$adopt_year - ulcnew$first_year + 1
all.policies <- unique(ulcnew$policy)

#run weibull for all policies to obtain speed scores
for(i in 1:length(all.policies)){
  usepolicy <- ulcnew[which(ulcnew$policy==all.policies[i]),]
  output <- survreg(Surv(time=usepolicy$duration, event=usepolicy$adopt, type="right")~1, dist="weibull", data=usepolicy)
  const <- as.numeric(coef(output)[1])
  logp <- log(1/(output$scale))
  se <- as.matrix(sqrt(diag(output$var)))
  se.const <- as.numeric(se[1,1])
  se.logp <- as.numeric(se[2,1])
  keep <- as.data.frame(cbind(all.policies[i], const, se.const, logp, se.logp))
  names(keep) <- c("policy", "weib.speed", "weib.speed.se", "weib.scale", "weib.scale.se")
  if(i == 1){
    weib <- keep
  }else{
    weib <- rbind(weib, keep)
  }
}

speeddata <- weib

weib.rescale.speed <- 1-as.numeric(speeddata$weib.speed)
weib.rescale.speed <- as.data.frame((weib.rescale.speed - min(weib.rescale.speed))/(max(weib.rescale.speed) - min(weib.rescale.speed)))
names(weib.rescale.speed) <- "weib.rescale.speed"

ULCspeed <- cbind(speeddata, weib.rescale.speed)

#optional: view and save speed data
View(ULCspeed)
write.csv(ULCspeed, file = "ULCspeed.csv")

#read-in design coding
ULCdesign <- read_excel("ULC_design.xlsx")

#use these commands only if needing to read back in text or speed variables
#ULCsummary<-read.csv("ULCsummary.csv",header=TRUE,na="NA")
#ULCreadability<-read.csv("ULCreadability.csv",header=TRUE,na="NA")
#ULCspeed<-read.csv("ULCspeed.csv",header=TRUE,na="NA")

#merge results
ULCdata<-left_join(ULCpolicy,ULCreadability, by = c("document"))
ULCdata<-left_join(ULCdata,ULCsummary, by = c("document"))
ULCdata<-left_join(ULCdata,ULCdesign, by = c("policy"))
ULCdata<-left_join(ULCdata,ULCspeed, by = c("policy"))

#create variables
ULCdata$cps<-(as.numeric(ULCdata$chars)/as.numeric(ULCdata$sents))
ULCdata$speed<-ULCdata$weib.rescale.speed
ULCdata$speed[ULCdata$adopt_count<=2]<-0
ULCdata$complex_policy<-0
ULCdata$complex_policy[ULCdata$majortopic_str=="Health"]<-1
ULCdata$complex_policy[ULCdata$majortopic_str=="Environment"]<-1
ULCdata$complex_policy[ULCdata$majortopic_str=="Energy"]<-1
ULCdata$complex_policy[ULCdata$majortopic_str=="Macroeconomics"]<-1
ULCdata$complex_policy[ULCdata$majortopic_str=="Foreign Trade"]<-1
ULCdata$complex_policy[ULCdata$majortopic_str=="Public Lands"]<-1

#limit analysis to no outliers
quartiles <- quantile(ULCdata$tokens, probs=c(.25, .75), na.rm = FALSE)
IQR <- IQR(ULCdata$tokens)
Lower <- quartiles[1] - 2*IQR
Upper <- quartiles[2] + 2*IQR 
ULCdata_nout <- subset(ULCdata, ULCdata$tokens > Lower & ULCdata$tokens < Upper)
dim(ULCdata_nout)

#now omit those missing policy design elements
ULCdata_nout_nona<-ULCdata_nout[!is.na(ULCdata_nout$total_complex_design_subitems),]
dim(ULCdata_nout_nona)

#and recode NAs to 0s for individual elements w/
#enough observations to analyze individually
ULCdata_nout_nona$goals_efficiency[is.na(ULCdata_nout_nona$goals_efficiency)] <- 0
ULCdata_nout_nona$goals_equity[is.na(ULCdata_nout_nona$goals_equity)] <- 0
ULCdata_nout_nona$goals_welfare[is.na(ULCdata_nout_nona$goals_welfare)] <- 0
ULCdata_nout_nona$goals_security[is.na(ULCdata_nout_nona$goals_security)] <- 0
ULCdata_nout_nona$goals_liberty[is.na(ULCdata_nout_nona$goals_liberty)] <- 0

ULCdata_nout_nona$implement_topdown[is.na(ULCdata_nout_nona$implement_topdown)] <- 0
ULCdata_nout_nona$implement_network[is.na(ULCdata_nout_nona$implement_network)] <- 0
ULCdata_nout_nona$implement_bottomup[is.na(ULCdata_nout_nona$implement_bottomup)] <- 0

ULCdata_nout_nona$socialconst_pospower[is.na(ULCdata_nout_nona$socialconst_pospower)] <- 0
ULCdata_nout_nona$socialconst_negpower[is.na(ULCdata_nout_nona$socialconst_negpower)] <- 0
ULCdata_nout_nona$socialconst_negweak[is.na(ULCdata_nout_nona$socialconst_negweak)] <- 0
ULCdata_nout_nona$socialconst_posweak[is.na(ULCdata_nout_nona$socialconst_posweak)] <- 0

#summary stats
#distribution of complexity measures
#starting with Flesch reading difficulty
#figures in Appendix B
hist(ULCdata_nout_nona$frd, 
     xlab="Flesch reading difficulty",
     main="",
     xlim = range(0,10,20,30,40,50,60,70,80,90,100),
     ylim = range(0,20))

#then Dale-Chall reading difficulty
hist(ULCdata_nout_nona$Dale.Chall.old, 
     xlab="Dale-Chall reading difficulty",
     main="",
     xlim = range(9.5,13),
     ylim = range(0,30))

#distribution of chars per sentence
hist(ULCdata_nout_nona$cps,
     xlab="Characters per Sentence",
     main="",
     xlim = range(100,450),
     ylim = range(0,25))

#distribution of total design elements
hist(ULCdata_nout_nona$total_complex_design_subitems, 
    xlab="Total Complex Design Elements",
    main="",
    xlim = range(0,8),
    ylim = range(0,20))

#given distribution we bin 4 or more together
ULCdata_nout_nona$total_complex_design_binned<-NA
ULCdata_nout_nona$total_complex_design_binned[ULCdata_nout_nona$total_complex_design_subitems==0]<-0
ULCdata_nout_nona$total_complex_design_binned[ULCdata_nout_nona$total_complex_design_subitems==1]<-1
ULCdata_nout_nona$total_complex_design_binned[ULCdata_nout_nona$total_complex_design_subitems==2]<-2
ULCdata_nout_nona$total_complex_design_binned[ULCdata_nout_nona$total_complex_design_subitems==3]<-3
ULCdata_nout_nona$total_complex_design_binned[ULCdata_nout_nona$total_complex_design_subitems>=4]<-4

#reattach
attach(ULCdata_nout_nona)

#also we can visualize which elements tend to go together
#figure 3
ULCdesign_subelements <- read_excel("ULC_design_subelements_matrix.xlsx")
c<-cor(ULCdesign_subelements)
corrplot(c, order = 'hclust', addrect = 4, tl.col="black", 
         tl.cex=0.75, 
         cl.lim=c(-1,1), 
         col=colorRampPalette(c("white","gray","black"))(200))

#correlations between textual complexity variables
#note correlations went down with new sample but still high
cor(cps,Dale.Chall.old) #0.69
cor(cps,frd) #0.87
cor(Dale.Chall.old,frd) #0.81

#distribution of major topics
#figure 1
ggplot(ULCdata_nout_nona, aes(fct_infreq(majortopic_str))) + 
  geom_bar() + xlab("Major Policy Topics") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

#but compare to all of SPID
spid <- read_dta("SPID_v1.2_policies.dta")
ggplot(spid, aes(fct_infreq(majortopic_str))) + 
  geom_bar() + xlab("Major Policy Topics in SPID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#law and crime represented the most in both datasets
#but civil rights underrepresented and labor, social welfare, 
#agriculture, defense, technology, foreign trade, immigration,
#and international affairs unrepresented.

#see also distribution of number of adopters and first year of adoption
#figure 2
ggplot(ULCdata_nout_nona, aes(adopt_count)) + geom_density() +
  xlab("Number of Adopters") + xlim(c(0,50))

ggplot(ULCdata_nout_nona, aes(adopt_ulc)) + geom_density() +
  xlab("First Promulgated by ULC") + xlim(c(1950,2022))

#correlation between total design complexity
#and textual complexity
cor.test(total_complex_design_binned,Dale.Chall.old, 
         alternative = c("greater"),
         na.action=na.omit) #
#0.22, df = 82, p = 0.021

cor.test(total_complex_design_binned,cps, 
         alternative = c("greater"),
         na.action=na.omit)
#0.19, df=0.82, p=0.044, one-tailed

cor.test(total_complex_design_binned,frd,
         alternative = c("greater"),
         na.action=na.omit) 
#0.18, df=0.82, p=0.52, one-tailed

#boxplots for figure 4
complex_labs<-c("0 elements", "1", "2", "3", "4 or more")
ggplot(ULCdata_nout_nona, aes(group=total_complex_design_binned,
                              x=total_complex_design_binned,
                              y=cps)) + geom_boxplot() +  
  scale_x_continuous(breaks=c(0,1,2,3,4),labels=complex_labs) +
  xlab("Total Complex Design Elements") + 
  ylab("Characters Per Sentence") +
  labs(caption="r = 0.18, p = 0.052, one-tailed") +
  theme(plot.caption = element_text(size = 12)) 
          
ggplot(ULCdata_nout_nona, aes(group=total_complex_design_binned,
                              x=total_complex_design_binned,
                              y=Dale.Chall.old)) + geom_boxplot() +  
  scale_x_continuous(breaks=c(0,1,2,3,4),labels=complex_labs) +
  xlab("Total Complex Design Elements") + 
  ylab("Dale-Chall Reading Difficulty") +
  labs(caption="r = 0.22, p = 0.021, one-tailed") +
  theme(plot.caption = element_text(size = 12)) 

ggplot(ULCdata_nout_nona, aes(group=total_complex_design_binned,
                              x=total_complex_design_binned,
                              y=frd)) + geom_boxplot() + 
  scale_x_continuous(breaks=c(0,1,2,3,4),labels=complex_labs) +
  xlab("Total Complex Design Elements") + 
  ylab("Flesch Reading Difficulty") +
  labs(caption="r = 0.19, p = 0.044, one-tailed") +
  theme(plot.caption = element_text(size = 12)) 

#ttests
#reported in table 2

##targets
t.target.dc<-t.test(Dale.Chall.old~target_binary,
                 var.equal=F, 
                 alternative = c("less"))
t.target.dc

t.target.frd<-t.test(frd~target_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.target.frd

t.target.cps<-t.test(cps~target_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.target.cps

##tools
t.tools.dc<-t.test(Dale.Chall.old~tools_binary,
                 var.equal=F, 
                 alternative = c("less"))
t.tools.dc

t.tools.cps<-t.test(cps~tools_binary,
                var.equal=F, 
                alternative = c("less"))
t.tools.cps

t.tools.frd<-t.test(frd~tools_binary,
                var.equal=F, 
                alternative = c("less"))
t.tools.frd

##goals
t.goals.dc<-t.test(Dale.Chall.old~goals_binary,
                var.equal=F, 
                alternative = c("less"))
t.goals.dc

t.goals.cps<-t.test(cps~goals_binary,
                var.equal=F, 
                alternative = c("less"))
t.goals.cps

t.goals.frd<-t.test(frd~goals_binary,
                var.equal=F, 
                alternative = c("less"))
t.goals.frd

##individual goals
#security
t.security.dc<-t.test(Dale.Chall.old~goals_security,
                      var.equal=F, 
                      alternative = c("less"))
t.security.dc

t.security.cps<-t.test(cps~goals_security,
                      var.equal=F, 
                      alternative = c("less"))
t.security.cps

t.security.frd<-t.test(frd~goals_security,
                      var.equal=F, 
                      alternative = c("less"))
t.security.frd

#results are in expected direction and significant.

#liberty
t.liberty.dc<-t.test(Dale.Chall.old~goals_liberty,
                      var.equal=F, 
                      alternative = c("less"))
t.liberty.dc

t.liberty.cps<-t.test(cps~goals_liberty,
                       var.equal=F, 
                       alternative = c("less"))
t.liberty.cps

t.liberty.frd<-t.test(frd~goals_liberty,
                       var.equal=F, 
                       alternative = c("less"))
t.liberty.frd

#liberty is associated with simpler language not more complex. 
#Significantly so for cps and frd.

#efficiency
t.efficiency.dc<-t.test(Dale.Chall.old~goals_efficiency,
                     var.equal=F, 
                     alternative = c("greater"))
t.efficiency.dc

t.efficiency.cps<-t.test(cps~goals_efficiency,
                      var.equal=F, 
                      alternative = c("greater"))
t.efficiency.cps

t.efficiency.frd<-t.test(frd~goals_efficiency,
                      var.equal=F, 
                      alternative = c("greater"))
t.efficiency.frd

#significantly less complex as expected by dale chall
#but not by other measures

#welfare
t.welfare.dc<-t.test(Dale.Chall.old~goals_welfare,
                        var.equal=F, 
                        alternative = c("less"))
t.welfare.dc

t.welfare.cps<-t.test(cps~goals_welfare,
                         var.equal=F, 
                         alternative = c("less"))
t.welfare.cps

t.welfare.frd<-t.test(frd~goals_welfare,
                         var.equal=F, 
                         alternative = c("less"))
t.welfare.frd

#more complex by two of the measures but not significantly so.

#equity
t.equity.dc<-t.test(Dale.Chall.old~goals_equity,
                     var.equal=F, 
                     alternative = c("less"))
t.equity.dc

t.equity.cps<-t.test(cps~goals_equity,
                      var.equal=F, 
                      alternative = c("less"))
t.equity.cps

t.equity.frd<-t.test(frd~goals_equity,
                      var.equal=F, 
                      alternative = c("less"))
t.equity.frd

#more complex on each measure but not significantly so. 

##causal model
t.causal.dc<-t.test(Dale.Chall.old~causal_binary,
                var.equal=F, 
                alternative = c("less"))
t.causal.dc

t.causal.cps<-t.test(cps~causal_binary,
                 var.equal=F, 
                 alternative = c("less"))
t.causal.cps

t.causal.frd<-t.test(frd~causal_binary,
                 var.equal=F, 
                 alternative = c("less"))
t.causal.frd

##implementation
t.implement.dc<-t.test(Dale.Chall.old~implement_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.implement.dc

t.implement.cps<-t.test(cps~implement_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.implement.cps

t.implement.frd<-t.test(frd~implement_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.implement.frd

#individual implementation strategies
t.td.dc<-t.test(Dale.Chall.old~implement_topdown,
                    var.equal=F, 
                    alternative = c("less"))
t.td.dc

t.td.cps<-t.test(cps~implement_topdown,
                     var.equal=F, 
                     alternative = c("less"))
t.td.cps

t.td.frd<-t.test(frd~implement_topdown,
                     var.equal=F, 
                     alternative = c("less"))
t.td.frd
#top-down is associated with simpler language not more complex
#across all three measures
#but not significantly so.

#networked
t.net.dc<-t.test(Dale.Chall.old~implement_network,
                var.equal=F, 
                alternative = c("less"))
t.net.dc

t.net.cps<-t.test(cps~implement_network,
                 var.equal=F, 
                 alternative = c("less"))
t.net.cps

t.net.frd<-t.test(frd~implement_network,
                 var.equal=F, 
                 alternative = c("less"))
t.net.frd

#networked consistently more complex
#significant on dale chall

#bottom-up
t.bu.dc<-t.test(Dale.Chall.old~implement_bottomup,
                 var.equal=F, 
                 alternative = c("greater"))
t.bu.dc

t.bu.cps<-t.test(cps~implement_bottomup,
                  var.equal=F, 
                  alternative = c("greater"))
t.bu.cps

t.bu.frd<-t.test(frd~implement_bottomup,
                  var.equal=F, 
                  alternative = c("greater"))
t.bu.frd
#bottom up is greater in complexity compared to topdown 
#and networked only

##social construction of targets
t.socconst.dc<-t.test(Dale.Chall.old~socialconst_binary,
                      var.equal=F, 
                      alternative = c("less"))
t.socconst.dc

t.socconst.cps<-t.test(cps~socialconst_binary,
                   var.equal=F, 
                   alternative = c("less"))
t.socconst.cps

t.socconst.frd<-t.test(frd~socialconst_binary,
                      var.equal=F, 
                      alternative = c("less"))
t.socconst.frd

##correlations: complex and speed
##plots for figure 5 in manuscript
cor.test(frd,speed,
         alternative = c("less"),
         na.action=na.omit) 
#r = -0.06, p = 0.28, one-tailed

plotLowess(speed~frd, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Speed of Adoption",
           xlab="Flesch Reading Difficulty")
abline(lm(speed~frd, data=ULCdata_nout_nona), col="blue")
text(x=40,y=0.2,'r = -0.06, p = 0.28, one-tailed', col="blue", cex=1.1)

cor.test(cps,speed,
         alternative = c("less"),
         na.action=na.omit) 
#r = -0.03, p = 0.41, one-tailed

plotLowess(speed~cps, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Speed of Adoption",
           xlab="Characters Per Sentence")
abline(lm(speed~cps, data=ULCdata_nout_nona), col="blue")
text(x=200,y=0.1,'r = -0.03, p = 0.41, one-tailed', col="blue", cex=1.1)

cor.test(Dale.Chall.old,speed,
         alternative = c("less"),
         na.action=na.omit) 
#r = -0.18, p = 0.05, one-tailed

plotLowess(speed~Dale.Chall.old, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Speed of Adoption",
           xlab="Dale-Chall Reading Difficulty")
abline(lm(speed~Dale.Chall.old, data=ULCdata_nout_nona), col="blue")
text(x=11,y=0.1,'r = -0.18, p = 0.05, one-tailed', col="blue", cex=1.1)

cor.test(ULCdata_nout_nona$total_complex_design_binned,
         ULCdata_nout_nona$speed,
         alternative = c("less"),
         na.action=na.omit) #-0.25, df=57, p=0.03, one-tailed
#using filled in speed: -0.24, df=82, p=0.02
#using subitems: -0.20, df=57, p=0.07
#using subitems and filled in speed: -0.23, df=82, p=0.02

plotLowess(speed~total_complex_design_binned, 
           data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Speed of Adoption",
           xlab="Total Complex Design Elements")
#xaxt='n')
#axis(side=1,at=c(0,1,2,3,4),labels=c("0 elements","1","2","3", "4 or more"))
abline(lm(speed~total_complex_design_binned, 
          data=ULCdata_nout_nona), col="blue")
text(x=2,y=0.1,'r = -0.24, p=0.02, one-tailed', col="blue", cex=1)



##correlations: complexity measures and adoptions
#figure 6 lowess plots
cor.test(frd,adopt_count,
         alternative = c("less"),
         na.action=na.omit) 
#r = -0.08, p = 0.23, one-tailed

plotLowess(adopt_count~frd, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Number of Adopters",
           xlab="Flesch reading difficulty")
#abline(lm(adopt_count~frd, data=ULCdata), col="orange")
#abline(lm(adopt_count~frd, data=ULCdata_nout), col="blue")
abline(lm(adopt_count~frd, data=ULCdata_nout_nona), col="blue")
text(x=40,y=35,'r = -0.08, p = 0.23, one-tailed', col="blue", cex=1.1)

cor.test(cps,adopt_count,
         alternative = c("less"),
         na.action=na.omit) 
#r = -0.08, p = 0.22, one-tailed

plotLowess(adopt_count~cps, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Number of Adopters",
           xlab="Characters Per Sentence")
#abline(lm(adopt_count~cps, data=ULCdata), col="orange")
#abline(lm(adopt_count~cps, data=ULCdata_nout), col="blue")
abline(lm(adopt_count~cps, data=ULCdata_nout_nona), col="blue")
text(x=200,y=35,'r = -0.08, p = 0.22, one-tailed', col="blue", cex=1.1)

cor.test(Dale.Chall.old,adopt_count,
         alternative = c("less"),
         na.action=na.omit)
#r = -0.18, p = 0.04, one-tailed

plotLowess(adopt_count~Dale.Chall.old, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Number of Adopters",
           xlab="Dale-Chall reading difficulty")
#abline(lm(adopt_count~Dale.Chall.old, data=ULCdata), col="orange")
#abline(lm(adopt_count~Dale.Chall.old, data=ULCdata_nout), col="blue")
abline(lm(adopt_count~Dale.Chall.old, 
          data=ULCdata_nout_nona), col="blue")
text(x=11.25,y=35,'r = -0.18, p = 0.04, one-tailed', col="blue", cex=1.1)

cor.test(total_complex_design_binned,adopt_count,
         alternative = c("less"),
         na.action=na.omit)
#r = -0.02, p = 0.42, one-tailed

plotLowess(adopt_count~total_complex_design_binned, data=ULCdata_nout_nona,
           col.lowess="red", lty.lowess=2,
           ylab="Number of Adopters",
           xlab="Total Complex Design Elements",
           xaxt='n')
axis(side=1,at=c(0,1,2,3,4),labels=c("0 elements","1","2","3", "4 or more"))
#abline(lm(adopt_count~cps, data=ULCdata), col="orange")
#abline(lm(adopt_count~cps, data=ULCdata_nout), col="blue")
abline(lm(adopt_count~total_complex_design_binned, data=ULCdata_nout_nona), col="blue")
text(x=1.5,y=40,'r = -0.02, p = 0.42, one-tailed', col="blue", cex=1)


##finally compare to complex policy codes
##Appendix C
par(mar=c(8,5,3,3))

tab<-aggregate(as.numeric(ULCdata_nout_nona$Dale.Chall.old), 
               list(ULCdata_nout_nona$majortopic_str), 
               FUN=mean)

tab <- tab[order(tab$x,decreasing = TRUE),]

barplot(tab$x, 
        beside=T, 
        names.arg=(tab$Group.1),
        cex.names=0.75,
        xlab="",
        ylab="Dale Chall",
        ylim=c(0,14),
        axes=TRUE,
        las=2
        )

tab2<-aggregate(as.numeric(ULCdata_nout_nona$frd), 
               list(ULCdata_nout_nona$majortopic_str), 
               FUN=mean)

tab2 <- tab2[order(tab2$x,decreasing = TRUE),]

barplot(tab2$x, 
        beside=T, 
        names.arg=(tab2$Group.1), 
        cex.names=0.75,
        xlab="",
        ylab="Flesch Reading Difficulty",
        ylim=c(0,100),
        axes=TRUE,
        las=2
)

tab3<-aggregate(as.numeric(ULCdata_nout_nona$cps), 
               list(ULCdata_nout_nona$majortopic_str), 
               FUN=mean)

tab3 <- tab3[order(tab3$x,decreasing = TRUE),]

barplot(tab3$x, 
        beside=T, 
        names.arg=(tab3$Group.1),
        cex.names=0.75,
        xlab="",
        ylab="Chars Per Sent",
        ylim=c(0,400),
        axes=TRUE,
        las=2
)

tab4<-aggregate(as.numeric(ULCdata_nout_nona$total_complex_design_subitems), 
               list(ULCdata_nout_nona$majortopic_str), 
               FUN=mean)

tab4 <- tab4[order(tab4$x,decreasing = TRUE),]

barplot(tab4$x, 
        beside=T, 
        names.arg=(tab4$Group.1),
        cex.names=0.75,
        xlab="",
        ylab="Complex Design Elements",
        ylim=c(0,8),
        axes=TRUE,
        las=2,
)

m1<-lm(adopt_count~
         complex_policy,
       data=ULCdata_nout_nona)

m2<-lm(speed~
          complex_policy,
        data=ULCdata_nout_nona)

m3<-lm(adopt_count~
         as.factor(majortopic_str),
       data=ULCdata_nout_nona)

m4<-lm(speed~
         as.factor(majortopic_str),
       data=ULCdata_nout_nona)

tab_model(m1, m2,
          m3, m4,
          show.ci = FALSE,
          show.se = TRUE,
          collapse.se = TRUE,
          p.style=c("stars"),
          p.threshold=c(0.1))

#robustness check - removing notes and comments
#Appendix D

### uploading text data with comments and notes removed
ULCtexts.alt <- readtext("ULC Texts Alt/*.txt")
ULCpdfs.alt <- readtext("ULC PDFs Alt/*.pdf")

### transforming raw text data from txt and pdf into a corpus
ULCcorp1a <- corpus(ULCtexts.alt)
ULCcorp2a <- corpus(ULCpdfs.alt)
ULCcorpus.alt <- corpus(ULCcorp1a + ULCcorp2a)

ULCtokens.alt<-tokens(ULCcorpus.alt, 
                  remove_numbers = TRUE,
                  remove_symbols = TRUE,
                  remove_url = TRUE)

#then move back to corpus format
ULCcorpus.alt <- vapply(ULCtokens.alt, paste, 
                          collapse = " ", character(1)) %>% corpus()

ULCsummary.alt <- textstat_summary(ULCcorpus.alt)
ULCread.alt <- textstat_readability(ULCcorpus.alt, measure = c("Flesch","Dale.Chall.old"))

ULCread.alt$frd<-abs(100-((100/(max(ULCread.alt$Flesch)-min(ULCread.alt$Flesch)))*
                               (ULCread.alt$Flesch-max(ULCread.alt$Flesch))+100))

ULCdata.alt<-left_join(ULCpolicy,ULCread.alt, by = c("document"))
ULCdata.alt<-left_join(ULCdata.alt,ULCsummary.alt, by = c("document"))
ULCdata.alt<-left_join(ULCdata.alt,ULCdesign, by = c("policy"))
ULCdata.alt<-left_join(ULCdata.alt,ULCspeed, by = c("policy"))

ULCdata.alt$cps<-(as.numeric(ULCdata.alt$chars)/as.numeric(ULCdata.alt$sents))

ULCdata.alt$speed<-ULCdata.alt$weib.rescale.speed
ULCdata.alt$speed[ULCdata.alt$adopt_count<=2]<-0

ULCdata.alt$complex_policy<-0
ULCdata.alt$complex_policy[ULCdata.alt$majortopic_str=="Health"]<-1
ULCdata.alt$complex_policy[ULCdata.alt$majortopic_str=="Environment"]<-1
ULCdata.alt$complex_policy[ULCdata.alt$majortopic_str=="Energy"]<-1
ULCdata.alt$complex_policy[ULCdata.alt$majortopic_str=="Macroeconomics"]<-1
ULCdata.alt$complex_policy[ULCdata.alt$majortopic_str=="Foreign Trade"]<-1
ULCdata.alt$complex_policy[ULCdata.alt$majortopic_str=="Public Lands"]<-1

ULCdata.alt_nout_nona<-ULCdata.alt_nout[!is.na(ULCdata.alt_nout$total_complex_design_subitems),]
dim(ULCdata.alt_nout_nona)

ULCdata.alt_nout_nona$total_complex_design_binned<-NA
ULCdata.alt_nout_nona$total_complex_design_binned[ULCdata.alt_nout_nona$total_complex_design_subitems==0]<-0
ULCdata.alt_nout_nona$total_complex_design_binned[ULCdata.alt_nout_nona$total_complex_design_subitems==1]<-1
ULCdata.alt_nout_nona$total_complex_design_binned[ULCdata.alt_nout_nona$total_complex_design_subitems==2]<-2
ULCdata.alt_nout_nona$total_complex_design_binned[ULCdata.alt_nout_nona$total_complex_design_subitems==3]<-3
ULCdata.alt_nout_nona$total_complex_design_binned[ULCdata.alt_nout_nona$total_complex_design_subitems>=4]<-4

attach(ULCdata.alt_nout_nona)
#appears to reduce some noise between measures of textual complexity
cor(cps,Dale.Chall.old) #similar
cor(cps,frd) #stronger
cor(Dale.Chall.old,frd) #weaker

cor.test(total_complex_design_binned,Dale.Chall.old, 
         alternative = c("greater"),
         na.action=na.omit) 
#original = 0.21, df = 82, p=0.025, one-tailed
#new = 0.18, df = 82, p=0.053, one-tailed

cor.test(total_complex_design_binned,cps, 
         alternative = c("greater"),
         na.action=na.omit) 
#original = 0.19, df=82, p=0.048, one-tailed
#new = 0.19, df = 82, p=0.039, one-tailed

cor.test(total_complex_design_binned,frd,
         alternative = c("greater"),
         na.action=na.omit)
#original = 0.18, df=82, p=0.053, one-tailed
#new = 0.18, df = 82, p=0.055, one-tailed

##targets
t.target.dc<-t.test(ULCdata.alt_nout_nona$Dale.Chall.old~ULCdata.alt_nout_nona$target_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.target.dc
#same results

t.target.frd<-t.test(frd~target_binary,
                     var.equal=F, 
                     alternative = c("less"))
t.target.frd
#same results

t.target.cps<-t.test(cps~target_binary,
                     var.equal=F, 
                     alternative = c("less"))
t.target.cps
#same results

##tools
t.tools.dc<-t.test(Dale.Chall.old~tools_binary,
                   var.equal=F, 
                   alternative = c("less"))
t.tools.dc
#same results

t.tools.cps<-t.test(cps~tools_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.tools.cps
#same results

t.tools.frd<-t.test(frd~tools_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.tools.frd
#sig at 0.1

##goals
t.goals.dc<-t.test(Dale.Chall.old~goals_binary,
                   var.equal=F, 
                   alternative = c("less"))
t.goals.dc
#in opposite direction

t.goals.cps<-t.test(cps~goals_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.goals.cps
#in opposite direction

t.goals.frd<-t.test(frd~goals_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.goals.frd
#in opposite direction

##causal model
t.causal.dc<-t.test(Dale.Chall.old~causal_binary,
                    var.equal=F, 
                    alternative = c("less"))
t.causal.dc
#same results

t.causal.cps<-t.test(cps~causal_binary,
                     var.equal=F, 
                     alternative = c("less"))
t.causal.cps
#same results

##causal model
t.causal.frd<-t.test(frd~causal_binary,
                     var.equal=F, 
                     alternative = c("less"))
t.causal.frd
#sig at p<0.1

##implementation
t.implement.dc<-t.test(Dale.Chall.old~implement_binary,
                       var.equal=F, 
                       alternative = c("less"))
t.implement.dc
#same results

##implementation
t.implement.cps<-t.test(cps~implement_binary,
                        var.equal=F, 
                        alternative = c("less"))
t.implement.cps
#same results

t.implement.frd<-t.test(frd~implement_binary,
                        var.equal=F, 
                        alternative = c("less"))
t.implement.frd
#same results

##social construction of targets
t.socconst.dc<-t.test(Dale.Chall.old~socialconst_binary,
                      var.equal=F, 
                      alternative = c("less"))
t.socconst.dc
#same results

t.socconst.cps<-t.test(cps~socialconst_binary,
                       var.equal=F, 
                       alternative = c("less"))
t.socconst.cps
#same results

##social construction of targets
t.socconst.frd<-t.test(frd~socialconst_binary,
                       var.equal=F, 
                       alternative = c("less"))
t.socconst.frd
#same results

cor.test(frd,adopt_count,
         alternative = c("less"),
         na.action=na.omit) 
#complexity up, adoptions down, n.s.
#about the same as main results but still n.s.

cor.test(cps,adopt_count,
         alternative = c("less"),
         na.action=na.omit) 
#complexity up, adoptions down, n.s.
#slightly weaker than main results but still n.s.

cor.test(Dale.Chall.old,adopt_count,
         alternative = c("less"),
         na.action=na.omit) 
#complexity up, adoptions down, sig.
#stronger than main results at r = -0.26

cor.test(frd,speed,
         alternative = c("less"),
         na.action=na.omit) 
#complexity up, speed down, n.s.
#about the same as main results

cor.test(cps,speed,
         alternative = c("less"),
         na.action=na.omit)
#complexity up, speed down, n.s.
#weaker than main results

cor.test(Dale.Chall.old,speed,
         alternative = c("less"),
         na.action=na.omit) 
#complexity up, speed down, sig
#slightly weaker than main results



#################end#######################

